Load in CSV data
shipibo_age_interval <- 2
spanish_age_interval <- 1
naming_data <- read_csv("data/naming_colors_participants.csv") %>%
left_join(read_csv("data/naming_colors_data.csv"), by = 'subj')
grouping_data <- read_csv("data/grouping_colors_participants.csv") %>%
left_join(read_csv("data/grouping_colors_data.csv"), by = 'subj')
shipibo_child_data <- read_csv("data/shipibo_children_colors_participants.csv") %>%
left_join(read_csv("data/shipibo_children_colors_data.csv"), by = 'subj')
spanish_child_data <- read_csv("data/spanish_children_colors_participants.csv") %>%
left_join(read_csv("data/spanish_children_colors_data.csv"), by = 'subj')
color_chip_data <- read_csv("data/wcs_measures.csv", skip = 1)
children_new_labels <- read_csv("data/unified_labels.csv")
graph_colors <- c(
'Amarillo' = '#FFD416',
'Ambi' = '#874A8C',
'Ami' = '#76296E',
'Azul' = '#337DCE',
'Barin Poi' = '#6D6212',
'Bexnan' = '#B6D744',
'Blanco' = '#DBDBDB',
'Celeste' = '#74DFF7',
'Chexe' = '#81C147',
'Chimapo' = '#003459',
'Emo' = '#007177',
'Gris' = '#979997',
'Jimi' = '#822158',
'Joshin' = '#BC1E47',
'Joxo' = '#DFE6F0',
'Kari' = '#571848',
'Kasho' = '#F07000',
'Keskiti' = '#E56F92',
'Koin' = '#50491D',
'Kononbi' = '#503B87',
'Konron' = '#BB8F00',
'Koro' = '#7B7B7B',
'Mai' = '#7F5A21',
'Mandi' = '#005637',
'Manxan' = '#FEBBA1',
'Marron' = '#9E5E22',
'Maxe' = '#DC4800',
'Morado' = '#B175F9',
'Nai' = '#19A2C2',
'Naranja' = '#FF6E00',
'Negro' = '#000000',
'Oxne' = '#66BCC9',
'Panshin' = '#EDC800',
'Pasna' = '#D3C5DF',
'Paxna' = '#EC99A2',
'Pei' = '#69C360',
'Pene' = '#55471E',
'Plomo' = '#848484',
'Poa' = '#7E4E94',
'Ranchesh' = '#4A2347',
'Rojo' = '#E03D28',
'Rosa' = '#FF8C9D',
'Tena' = '#C5D500',
'Verde' = '#61E27B',
'Yame' = '#666412',
'Yankon' = '#00A79E',
'Wiso' = '#272727',
'Xena' = '#D4799C',
'Xexe' = '#9769AE',
'Xo' = '#3A6E14',
'Spanish Term' = '#FF6E00'
)
spanish_terms <- c("Amarillo", "Azul", "Blanco", "Celeste", "Gris", "Marron", "Morado", "Naranja", "Negro", "Plomo", "Rojo", "Rosa", "Verde")
shipibo_terms <- c("Ambi", "Ami", "Barin Poi", "Bexnan", "Chexe", "Chimapo", "Emo", "Jimi", "Joshin", "Joxo", "Kari", "Kasho", "Keskiti", "Koin", "Kononbi", "Konron", "Koro", "Mai", "Mandi", "Manxan", "Maxe", "Nai", "Oxne", "Panshin", "Pasna", "Paxna", "Pei", "Pene", "Poa", "Ranchesh", "Tena", "Yame", "Yankon", "Wiso", "Xena", "Xexe", "Xo")
shipibo_chip_set <- read.csv(text = "shipibo, spanish, munsell_code, chip_id
Joshin, rojo, G3, 245
Pei/Xo, verde, G18, 234
Panshin, amarillo, C9, 297
Wiso, negro, J1/I0, 312
Joxo, blanco, A/B0, 274
Nai, celeste, E29, 1
Ami/Poa, morado, H36, 325
Barin poi, mierda sol, F12, 320")
spanish_chip_set <- read.csv(text = "spanish, code, munsell_code, chip_id
Blanco, BL, A/B0, 274
Verde, VD, G18, 234
Rojo, RJ, G3, 245
Amarillo, AM, C9, 297
Azul, AZ, F30, 291
Negro, NG, J1/I0, 312
Naranja, NR, E4, 121
Gris, GR, F0, 46
Morado, MRD, H36, 325
Marron, MRN, G5, 266
Rosa, RS, F39, 65")
string_spelling_list <- "`Amarillo` = c('amarilla', 'amarillo'), `Ami` = c('ami'), `Ambi` = c('ambi'), `Azul` = c('azul', 'azu'), `Barin Poi` = c('barin pui', 'barrin pui', 'barrinpui', 'pui', 'barin poi', 'barrin poi', 'bavrinpui*', 'barri'), `Bexnan` = c('berrnan', 'bexna', 'bexnan'), `Blanco` = c('blanco'), `Celeste` = c('celeste'), `Chexe` = c('chese', 'chexe'), `Chimapo` = c('chimapu'), `Emo` = c('emu'), `Gris` = c('gris'), `Jimi` = c('jimi'), `Jisa` = c('jisa'), `Joshin` = c('joshin', 'joxin', 'toshin'), `Joxo` = c('josho', 'joxo'), `Kari` = c('cari', 'carri', 'kari', 'karri'), `Kasho` = c('kashos'), `Keskiti` = c('kex keti'), `Koin` = c('kuin'), `Kononbi` = c('kunumbi'), `Konron` = c('korrum', 'kumrrum', 'kunrrum'), `Koro` = c('coro'), `Mai` = c('mai'), `Mandi` = c('mandi'), `Manxan` = c('manrran', 'manshan', 'manxam', 'manxan', 'maxan', 'maxna'), `Marron` = c('marron'), `Maxe` = c('maxe'), `Mierda Sol` = c('miarda', 'miarda del sol'), `Morado` = c('bioleta', 'morado', 'violeta', 'morada'), `Nai` = c('nai', 'nia'), `Naranja` = c('naranja', 'naranjada', 'narranxa', 'naranjado', 'narango', 'naranjo', 'anaranjado'), `Negro` = c('negro'), `Oxne` = c('oshne', 'oxne', 'oxe'), `Pei` = c('pei'), `Poa` = c('pua'), `Pene` = c('pene'), `Panshin` = c('panshin'), `Pasna` = c('paxsna', 'pasna'), `Paxna` = c('parrna', 'paxna'), `Plomo` = c('plomo'), `Ranchesh` = c('ranchex'), `Rojo` = c('rojo', 'roja'), `Rosa` = c('rosada', 'rosa', 'rosado'), `Spanish Term` = c('carne', 'agua', 'agur', 'uva color*', 'pasto payota', 'chocolate', 'coral', 'oscuro', 'lila', 'color cielo', 'cielo'), `Tena` = c('tena'), `Verde` = c('verde', 'cerde', 'verdesito'), `Wiso` = c('wiso'), `Xena` = c('xena'), `Xo` = c('xo'), `Xexe` = c('xexe', 'xexi'), `Yame` = c('rayame', 'yame'), `Yankon` = c('rayanko', 'yankom', 'yankon', 'yankum', 'yankun', 'yankontani', 'yakon', 'yakun', 'yankoncha'), `NA` = c(NA)"
spelling_list <- eval(parse(text = paste0("c(",string_spelling_list,")")))
naming_data %<>%
mutate(color_cat = ifelse(is.na(color_cat), first_response, color_cat)) %>%
mutate(color_cat = ifelse(color_cat %in% unlist(spelling_list), color_cat, NA)) %>%
mutate(color_cat = eval( parse(text = gsub(pattern = "x", replacement = string_spelling_list, "forcats::fct_collapse(color_cat, x)")))
)
grouping_data %<>%
mutate(`nombre del grupo` = ifelse(`nombre del grupo` %in% unlist(spelling_list),
`nombre del grupo`, NA)) %>%
mutate(`nombre del grupo` = eval( parse(text = gsub(pattern = "x", replacement = string_spelling_list, "forcats::fct_collapse(`nombre del grupo`, x)")))
)
color_chip_data %<>%
mutate(hex = colorspace::hex(
colorspace::LAB(color_chip_data$`L*`, color_chip_data$`a*`,
color_chip_data$`b*`, color_chip_data$`#cnum`), fixup = T))Import Berlin-Kay Shipibo data
kay_langs <- read_tsv("data/WCS-Data-20110316/lang.txt",
col_names = c('WCS Language Number', 'WCS Language Name',
'WCS Language Geographic Location', 'Field Worker')) %>%
filter(`WCS Language Name` == 'Shipibo')
kay_dict <- read_tsv("data/WCS-Data-20110316/dict.txt",
col_names = c('WCS Language Number', 'Term Number',
'Term', 'Term Abbreviation')) %>%
filter(`WCS Language Number` == kay_langs$`WCS Language Number`)
kay_foci <- read_tsv("data/WCS-Data-20110316/foci-exp.txt",
col_names = c("WCS Language Number", "WCS Speaker Number",
"WCS Focus Response", "Term Abbreviation",
"Single Chip")) %>%
filter(`WCS Language Number` == kay_langs$`WCS Language Number`)
kay_speaker <- read_tsv("data/WCS-Data-20110316/spkr-lsas.txt",
col_names = c("WCS Language Number", "WCS Speaker Number",
"WCS Speaker Age", "WCS Speaker Sex")) %>%
filter(`WCS Language Number` == kay_langs$`WCS Language Number`)
kay_terms <- read_tsv("data/WCS-Data-20110316/term.txt",
col_names = c('WCS Language Number', 'WCS Speaker Number',
'WCS Chip Number', 'Term Abbreviation')) %>%
filter(`WCS Language Number` == kay_langs$`WCS Language Number`) %>%
left_join(select(kay_dict, Term, `Term Abbreviation`), by = "Term Abbreviation") %>%
left_join(select(kay_speaker, -`WCS Language Number`), by = "WCS Speaker Number") %>%
mutate(Term = forcats::fct_collapse(Term,
`Ami` = c("ami"),
`Barin Poi` = c("barin poi"),
`Cana` = c("cana"),
`Chexe` = c("cheshe"),
`Chimapo` = c("chimapo"),
`Koro` = c("coro"),
`Emo` = c("emo"),
`Joshin` = c("joshin"),
`Joxo` = c("josho"),
`Kaqui` = c("kaqui"),
`Mai` = c("mai"),
`Manxan` = c("manshan"),
`Panshin` = c("panshin"),
`Pasna` = c("pashnatani"),
`Pene` = c("pene"),
`Pota'` = c("pota'"),
`Xena` = c("shane"),
`Xo` = c("shoo"),
`Yame` = c("yametani"),
`Yankon` = c("yancon"),
`Wiso` = c("huiso")
))Which terms appear to be basic and commonly used?
naming_data_profusion <- naming_data %>%
group_by(subj, color_cat) %>%
summarise(n = n()) %>%
group_by(color_cat) %>%
spread(subj, n, fill = 0) %>%
gather(key = 'subj', value = 'n', -color_cat) %>%
summarise(`% of Subjects Who Used the Term (Current)` = 100*sum(n > 0)/n(),
`Mean % of Chips in Set Labeled (Current)` = 100*mean(n)/165) %>%
dplyr::rename(`Color Term` = color_cat)
kay_naming_profusion <- kay_terms %>%
group_by(`WCS Speaker Number`, Term) %>%
summarise(n = n()) %>%
group_by(Term) %>%
spread(`WCS Speaker Number`, n, fill = 0) %>%
gather(key = 'WCS Speaker Number', value = 'n', -Term) %>%
summarise(`% of Subjects Who Used the Term (Kay)` = 100*sum(n > 0)/n(),
`Mean % of Chips in Set Labeled (Kay)` = 100*mean(n)/330) %>%
dplyr::rename(`Color Term` = Term)
naming_profusion <- full_join(naming_data_profusion, kay_naming_profusion,
by = 'Color Term') %>%
dplyr::select(`Color Term`, `% of Subjects Who Used the Term (Current)`,
`% of Subjects Who Used the Term (Kay)`, `Mean % of Chips in Set Labeled (Current)`,
`Mean % of Chips in Set Labeled (Kay)` ) %>%
dplyr::arrange(`Color Term`) %>%
mutate_if(is.double, round, digits = 2)
kay_naming_list <- as.character(na.omit(filter(naming_profusion, `% of Subjects Who Used the Term (Kay)` > 50 & !is.na(`Color Term`))$`Color Term`))
naming_list <- as.character(na.omit(filter(naming_profusion, `% of Subjects Who Used the Term (Current)` > 50 & !is.na(`Color Term`))$`Color Term`))
datatable(naming_profusion, rownames = FALSE)In our naming task with 2 sets of 165 color chips, commonly used terms include:
This is a notably smaller set of terms compared to the original WCS:
num_groups <- grouping_data %>%
filter(task == 1) %>%
group_by(subj) %>%
summarise(`# of Groups` = n_distinct(`nombre del grupo`)) %>%
ungroup() %>%
summarise(`Avg # of Groups` = mean(`# of Groups`),
`Min # of Groups` = min(`# of Groups`),
`Max # of Groups` = max(`# of Groups`))
grouping_data_profusion <- grouping_data %>%
filter(task == 1) %>%
group_by(subj, `nombre del grupo`) %>%
summarise(`cuantas tarjetas` = mean(`cuantas tarjetas`)) %>%
group_by(`nombre del grupo`) %>%
spread(subj, `cuantas tarjetas`, fill = 0) %>%
gather(key = 'subj', value = 'n', -`nombre del grupo`) %>%
summarise(`% of Subjects Who Used the Term` = 100*sum(n > 0)/n(),
`Mean % of Chips in Set Labeled` = 100*mean(n)/60) %>%
dplyr::rename(`Color Term` = `nombre del grupo`)
grouping_list <- as.character(na.omit(filter(grouping_data_profusion, `% of Subjects Who Used the Term` > 50 & !is.na(`Color Term`))$`Color Term`))
datatable(grouping_data_profusion, rownames = FALSE)In the grouping task with 60 chips, subjects usually create between 4-7 groups and mostly use terms such as:
For each color chip, how many adults label it with the same term?
consensus <- 75
naming_consensus <- naming_data %>%
select(subj, chip_id, color_cat) %>%
mutate(set = ifelse((chip_id %% 2) == 0, 'even', 'odd')) %>%
split(.$set) %>%
map_df(function(x) {
x %>%
group_by(chip_id, color_cat) %>%
summarise(n = n()) %>%
group_by(chip_id) %>%
mutate(perc = 100*n/sum(n)) %>%
select(-n)
}) %>%
arrange(chip_id) %>%
rename(`Chip ID` = chip_id, `Color Term` = color_cat, `% of Subjects` = perc)
kay_consensus <- kay_terms %>%
group_by(`WCS Chip Number`, Term) %>%
summarise(n = n()) %>%
group_by(`WCS Chip Number`) %>%
mutate(`% of Subjects` = 100*n/sum(n)) %>%
select(-n) %>%
arrange(`WCS Chip Number`)
focal_terms <- pander::p(as.character(
unique(filter(naming_consensus,`% of Subjects` >= consensus)$`Color Term`)),
wrap = '', copula = ', and ')
kay_focal_terms <- pander::p(as.character(
unique(filter(kay_consensus,`% of Subjects` >= consensus)$`Term`)),
wrap = '', copula = ', and ')
color_chip_hexes <- color_chip_data[, c('#cnum', 'hex')]
highest_chips <- (naming_consensus %>% group_by(`Color Term`) %>%
filter(`% of Subjects` >= consensus & `% of Subjects` == max(`% of Subjects`)))$`Chip ID`
kay_highest_chips <- (kay_consensus %>% group_by(`Term`) %>%
filter(`% of Subjects` >= consensus & `% of Subjects` == max(`% of Subjects`)))$`WCS Chip Number`
agreed_chips <- naming_consensus %>%
group_by(`Color Term`) %>%
filter(`% of Subjects` >= consensus) %>%
arrange(`Color Term`, `Chip ID`) %>%
left_join(color_chip_hexes,
by = c("Chip ID" = "#cnum")) %>%
dplyr::rename(`Hex Code` = hex) %>%
mutate(highest_chips = ifelse(`Chip ID` %in% highest_chips, 1, 0))
kay_agreed_chips <- kay_consensus %>%
group_by(Term) %>%
filter(`% of Subjects` >= consensus) %>%
arrange(`Term`, `WCS Chip Number`) %>%
left_join(color_chip_hexes,
by = c("WCS Chip Number" = "#cnum")) %>%
dplyr::rename(`Hex Code` = hex) %>%
mutate(highest_chips = ifelse(`WCS Chip Number` %in% kay_highest_chips, 1, 0))
datatable(agreed_chips, rownames = FALSE,
options=list(columnDefs = list(list(
visible=FALSE, targets=c(grep('highest_chips', names(agreed_chips))-1))))) %>%
formatStyle('highest_chips', target = 'row',
fontWeight = styleEqual(c(0,1), c('normal','bold'))) %>%
formatStyle(columns = "Hex Code",
background = styleEqual(agreed_chips$`Hex Code`, agreed_chips$`Hex Code`))datatable(kay_agreed_chips, rownames = FALSE,
options=list(columnDefs = list(list(
visible=FALSE, targets=c(grep('highest_chips', names(kay_agreed_chips))-1))))) %>%
formatStyle('highest_chips', target = 'row',
fontWeight = styleEqual(c(0,1), c('normal','bold'))) %>%
formatStyle(columns = "Hex Code",
background = styleEqual(kay_agreed_chips$`Hex Code`, kay_agreed_chips$`Hex Code`))The only categories with chips that reach a high level of consensus appear to be Yankon, Joshin, Panshin, Joxo, and Wiso
highest_consenus <- naming_consensus %>%
group_by(`Chip ID`) %>%
filter(`% of Subjects` == max(`% of Subjects`)) %>%
left_join(color_chip_data, by = c("Chip ID" = "#cnum")) %>%
mutate(`Spanish Term` = ifelse(`Color Term` == 'Spanish Term','Spanish','Shipibo'))
consensus_plot <- ggplot(highest_consenus,
aes(x = H, y = factor(V),
colour = `Color Term`, size = `% of Subjects`)) +
geom_point() +
scale_size(range = c(0, 2.5)) +
scale_shape_manual(name = 'Spanish Term',
values = c('Spanish' = 17, 'Shipibo' = 16)) +
scale_colour_manual(name = "Color Term",values = graph_colors) +
scale_y_discrete(limits = rev(levels(factor(highest_consenus$V)))) +
theme_bw()
kay_highest_consenus <- kay_consensus %>%
group_by(`WCS Chip Number`) %>%
filter(`% of Subjects` == max(`% of Subjects`)) %>%
left_join(color_chip_data, by = c("WCS Chip Number" = "#cnum"))
kay_consensus_plot <- ggplot(kay_highest_consenus,
aes(x = H, y = factor(V),
colour = Term, size = `% of Subjects`)) +
geom_point() +
scale_size(range = c(0, 2.5)) +
scale_colour_manual(name = "Term",values = graph_colors) +
scale_y_discrete(limits = rev(levels(factor(kay_highest_consenus$V)))) +
theme_bw()Original plot displaying 330 chips by their V and H coordinates
Levels of entropy across chips?
num_categories <- n_distinct(naming_consensus$`Color Term`)
naming_entropy <- naming_consensus %>%
mutate(`% of Subjects` = `% of Subjects`/100) %>%
spread(key = 'Color Term', value = '% of Subjects', fill = 0) %>%
mutate_if(is.double, funs( . * log(., base = num_categories))) %>%
mutate_if(is.double, funs(replace(., is.nan(.), 0))) %>%
ungroup() %>%
mutate(Entropy = -rowSums(.[-1])) %>%
select(`Chip ID`, Entropy) %>%
left_join(color_chip_data, by = c("Chip ID" = "#cnum"))
entropy_plot <- ggplot(naming_entropy,
aes(x = H, y = factor(V),
colour = hex, size = Entropy)) +
geom_point() +
scale_size(range = c(0, 2.5)) +
scale_colour_manual(values = eval(parse(text = paste0("c(", paste0("'", naming_entropy$hex, "' = '", naming_entropy$hex, "'", collapse = ", "),")")))) +
scale_y_discrete(limits = rev(levels(factor(naming_entropy$V)))) +
theme_bw() +
theme(legend.position = "none")
kay_num_categories <- n_distinct(kay_terms$Term)
kay_naming_entropy <- kay_consensus %>%
mutate(`% of Subjects` = `% of Subjects`/100) %>%
spread(key = 'Term', value = '% of Subjects', fill = 0) %>%
mutate_if(is.double, funs( . * log(., base = kay_num_categories))) %>%
mutate_if(is.double, funs(replace(., is.nan(.), 0))) %>%
ungroup() %>%
mutate(Entropy = -rowSums(.[-1])) %>%
select(`WCS Chip Number`, Entropy) %>%
left_join(color_chip_data, by = c("WCS Chip Number" = "#cnum"))
kay_entropy_plot <- ggplot(kay_naming_entropy,
aes(x = H, y = factor(V),
colour = hex, size = Entropy)) +
geom_point() +
scale_size(range = c(0, 2.5)) +
scale_colour_manual(values = eval(parse(text = paste0("c(", paste0("'", kay_naming_entropy$hex, "' = '", kay_naming_entropy$hex, "'", collapse = ", "),")")))) +
scale_y_discrete(limits = rev(levels(factor(kay_naming_entropy$V)))) +
theme_bw() +
theme(legend.position = "none")
entropy_change <- full_join(select(naming_entropy, `Chip ID`, Entropy),
select(kay_naming_entropy, `WCS Chip Number`, Entropy),
by = c("Chip ID" = "WCS Chip Number"),
suffix = c(" (Current)", " (Kay)")) %>%
mutate(`Entropy Change` = `Entropy (Current)` - `Entropy (Kay)`) %>%
left_join(color_chip_data, by = c("Chip ID" = "#cnum"))
entropy_change_plot <- ggplot(entropy_change,
aes(x = H, y = factor(V),
colour = `Entropy Change`, size = `Entropy Change`)) +
geom_point() +
scale_size(range = c(0, 2.5)) +
scale_colour_distiller(palette = "RdBu") +
scale_y_discrete(limits = rev(levels(factor(entropy_change$V)))) +
theme_bw() +
theme(legend.position = "none")
spanish_use <- naming_consensus %>%
mutate(`Color Term` = ifelse(as.character(`Color Term`) %in% spanish_terms,
'Spanish Term', as.character(`Color Term`))) %>%
group_by(`Chip ID`, `Color Term`) %>%
summarise(`% of Subjects` = sum(`% of Subjects`, na.rm = T)) %>%
group_by(`Chip ID`) %>%
spread(key = `Color Term`, value = `% of Subjects`, fill = 0) %>%
select(`Chip ID`, `Spanish Term`) %>%
left_join(color_chip_data, by = c("Chip ID" = "#cnum"))
spanish_use_plot <- ggplot(spanish_use,
aes(x = H, y = factor(V),
colour = `Spanish Term`, size = `Spanish Term`)) +
geom_point() +
scale_size(range = c(0, 2.5)) +
scale_colour_distiller(palette = "RdBu") +
scale_y_discrete(limits = rev(levels(factor(entropy_change$V)))) +
theme_bw() +
theme(legend.position = "none")Is there a similar amount of consensus on labeling between children and adults (in Shipibo)?
shipibo_1st_response <- shipibo_child_data %>%
mutate(age = ifelse(is.na(age), as.numeric(as.character(edad)), as.numeric(as.character(age)))) %>%
filter(task == 1) %>%
mutate(response_1 = ifelse(response_1 %in% unlist(spelling_list),
response_1, NA)) %>%
mutate(response_1 = eval( parse(text = gsub(pattern = "x", replacement = string_spelling_list,
"forcats::fct_collapse(response_1, x)")))
) %>%
mutate( age_ints = round(age/shipibo_age_interval)*shipibo_age_interval) %>%
select(subj, age, age_ints, prompt, response_1) %>%
split(.$age_ints) %>%
map_df(function(x) {
x %>%
mutate(response_1 = as.character(response_1)) %>%
spread(prompt, response_1, fill = 'No Response') %>%
gather(key = 'prompt', value = 'response', -subj, -age, -age_ints) %>%
group_by(age_ints, prompt, response) %>%
summarise(n = n()) %>%
group_by(age_ints, prompt) %>%
mutate(perc = 100*n/sum(n), n_total = sum(n))
}) %>% ungroup() %>%
mutate(prompt = as.numeric(as.character(forcats::fct_collapse(prompt,
`1` = c('celeste'),
`234` = c('verde'),
`245` = c('rojo'),
`274` = c('blanco'),
`297` = c('amarillo'),
`312` = c('negro'),
`320` = c('mierda sol'),
`325` = c('morado'))))) %>%
left_join(color_chip_hexes,
by = c("prompt" = "#cnum")) %>%
rename(Age = age_ints, `Chip ID` = prompt, `Color Term` = response,
`% of Subjects` = perc, `Hex Code` = hex) %>%
filter(n_total >= 4)
adult_naming <- naming_consensus %>%
group_by(`Color Term`) %>%
mutate(Age = 18) %>%
arrange(`Chip ID`, `Color Term`) %>%
left_join(color_chip_hexes,
by = c("Chip ID" = "#cnum")) %>%
filter(`Chip ID` %in% shipibo_chip_set$chip_id & !is.na(`Color Term`)) %>%
dplyr::rename(`Hex Code` = hex)
naming_data_combined <- bind_rows(shipibo_1st_response, adult_naming) %>%
mutate(language = ifelse(`Color Term` %in% spanish_terms, 'Spanish',
ifelse(`Color Term` %in% shipibo_terms, 'Shipibo', NA)))
term_prototypes <- naming_consensus %>%
group_by(`Color Term`) %>%
dplyr::arrange(`Color Term`, desc(`% of Subjects`)) %>%
slice(1:3) %>%
left_join(color_chip_hexes,
by = c("Chip ID" = "#cnum")) %>%
dplyr::rename(`Hex Code` = hex)
shipibo_chip_set_data <- color_chip_data %>%
filter(`#cnum` %in% shipibo_chip_set$chip_id) %>%
select(`#cnum`, `L*`, `a*`, `b*`, hex) %>%
arrange(`#cnum`) %>%
rename(`Chip ID` = `#cnum`, `Hex Code` = hex)
datatable(shipibo_chip_set_data, rownames = FALSE) %>%
formatStyle(columns = "Hex Code",
background = styleEqual(shipibo_chip_set_data$`Hex Code`, shipibo_chip_set_data$`Hex Code`))Spanish term as a dotted line?
p <- ggplot(filter(naming_data_combined, Age < 18 & language == 'Shipibo'),
aes(x = Age, y = `% of Subjects`, group = `Color Term`, colour = `Color Term`)) +
facet_wrap(~`Chip ID`) +
geom_line(size = 1) +
geom_line(data = filter(naming_data_combined,
Age < 18 & language == 'Spanish'),
linetype = 2, size = 1) +
geom_point(data = filter(naming_data_combined,
Age < 18 & language == 'Spanish'),
shape = 17, size = 3) +
geom_point( size=3) +
geom_point(data = filter(naming_data_combined, Age >= 18), size=3) +
scale_y_continuous(limits = c(-10,110), breaks = seq(0,100, by = 25)) +
scale_x_continuous(breaks = c(seq(6,12,2),18), labels = c(seq(6,12,2),'Adult')) +
scale_colour_manual(name = "Color Term",values = graph_colors) +
theme_bw() +
theme(panel.grid = element_blank())
ggplotly(p)spanish_chip_set_data <- color_chip_data %>%
filter(`#cnum` %in% spanish_chip_set$chip_id) %>%
select(`#cnum`, `L*`, `a*`, `b*`, hex) %>%
arrange(`#cnum`) %>%
rename(`Chip ID` = `#cnum`, `Hex Code` = hex)
datatable(spanish_chip_set_data, rownames = FALSE, options = list(pageLength = nrow(spanish_chip_set_data))) %>%
formatStyle(columns = "Hex Code",
background = styleEqual(spanish_chip_set_data$`Hex Code`, spanish_chip_set_data$`Hex Code`))spanish_1st_response <- spanish_child_data %>%
mutate(age = ifelse(is.na(age), as.numeric(as.character(edad)), as.numeric(as.character(age)))) %>%
filter(task == 1) %>%
mutate(response_1 = ifelse(tolower(response_1) %in% unlist(spelling_list),
tolower(response_1), NA)) %>%
mutate(response_1 = eval( parse(text = gsub(pattern = "x", replacement = string_spelling_list, "forcats::fct_collapse(response_1, x)")))
) %>%
mutate( age_ints = round(age/spanish_age_interval)*spanish_age_interval) %>%
select(subj, age, age_ints, prompt, response_1) %>%
split(.$age_ints) %>%
map_df(function(x) {
x %>%
mutate(response_1 = as.character(response_1)) %>%
spread(prompt, response_1, fill = 'No Response') %>%
gather(key = 'prompt', value = 'response', -subj, -age, -age_ints) %>%
group_by(age_ints, prompt, response) %>%
summarise(n = n()) %>%
group_by(age_ints, prompt) %>%
mutate(perc = 100*n/sum(n), n_total = sum(n))
}) %>% ungroup() %>%
mutate(prompt = as.numeric(as.character(forcats::fct_collapse(prompt,
`297` = c('AM'),
`291` = c('AZ'),
`274` = c('BL'),
`46` = c('GR'),
`325` = c('MRD'),
`266` = c('MRN'),
`312` = c('NG'),
`121` = c('NR'),
`245` = c('RJ'),
`65` = c('RS'),
`234` = c('VD'))))) %>%
left_join(color_chip_hexes,
by = c("prompt" = "#cnum")) %>%
rename(Age = age_ints, `Chip ID` = prompt, `Color Term` = response,
`% of Subjects` = perc, `Hex Code` = hex) %>%
filter(n_total >= 4) %>%
mutate(language = ifelse(`Color Term` %in% spanish_terms, 'Spanish', ifelse(`Color Term` %in% shipibo_terms, 'Shipibo', NA)))Characterize consensus over the WCS map
p <- ggplot(filter(spanish_1st_response, language == 'Spanish'),
aes(x = Age, y = `% of Subjects`, group = `Color Term`, colour = `Color Term`)) +
facet_wrap(~`Chip ID`) +
geom_line(size = 1) +
geom_point(size=3) +
geom_line(data = filter(spanish_1st_response, language == 'Shipibo'),
linetype = 2, size = 1) +
geom_point(data = filter(spanish_1st_response, language == 'Shipibo'),
shape = 17, size = 3) +
scale_y_continuous(limits = c(-10,110), breaks = seq(0,100, by = 25)) +
scale_colour_manual(name = "Color Term",values = graph_colors) +
theme_bw() +
theme(panel.grid = element_blank())
ggplotly(p)#
# child_table <- filter(all_child_data, language == 'Shipibo' & task == 'comprehension') %>%
# unite('all_responses', response_1:response_4, sep = ', ', remove = F) %>%
# mutate(all_responses = gsub("(,\\s)?NA", "", all_responses)) %>%
# mutate(all_responses = stringr::str_replace_all(all_responses,
# c('amarillo' = '297',
# 'amarilla' = '297',
# 'blanco' = '274',
# 'celeste' = '1',
# 'joxo' = '274',
# 'miarda(\\sdel sol)?' = '320',
# 'morada' = '325',
# 'morado' = '325',
# 'negro' = '312',
# 'roja' = '245',
# 'rojo' = '245',
# 'verde' = '234'
# ))) %>%
# left_join(shipibo_chip_set %>%
# mutate(shipibo = tolower(shipibo)) %>%
# select(shipibo, chip_id)) %>%
# mutate(correct = ifelse(stringr::str_detect(all_responses, as.character(chip_id)), 1, 0))
#
#
#
# find_correct <- function(child_table, string_spelling_list) {
# if (first(child_table$language) == 'Shipibo') {
#
# if (first(child_table$task) == 'production') {
#
# child_table %<>%
# mutate(response_1 = eval( parse(text = gsub(pattern = "x",
# replacement = string_spelling_list,
# "forcats::fct_collapse(response_1, x)")))) %>%
# mutate(correct = stringr::str_detect(tolower(response_1), tolower(shipibo)))
#
# } else if (first(child_table$task) == 'comprehension') {
# child_table %<>%
# unite('all_responses', response_1:response_4, sep = ', ', remove = F) %>%
# mutate(all_responses = gsub("(,\\s)?NA", "", all_responses)) %>%
# mutate(all_responses = stringr::str_replace_all(all_responses,
# c('amarillo' = '297',
# 'amarilla' = '297',
# 'blanco' = '274',
# 'celeste' = '1',
# 'joxo' = '274',
# 'miarda(\\sdel sol)?' = '320',
# 'morada' = '325',
# 'morado' = '325',
# 'negro' = '312',
# 'roja' = '245',
# 'rojo' = '245',
# 'verde' = '234'
# )))
#
# }
#
# } else if (first(child_table$language) == 'Spanish') {
#
# if (first(child_table$task) == 'production') {
#
# child_table %<>%
# mutate(response_1 = eval( parse(text = gsub(pattern = "x",
# replacement = string_spelling_list,
# "forcats::fct_collapse(response_1, x)")))) %>%
# mutate(correct = stringr::str_detect(tolower(response_1), tolower(spanish)))
#
# } else if (first(child_table$task) == 'comprehension') {
#
# child_table %<>%
# unite('all_responses', response_1:response_4, sep = ', ', remove = F) %>%
# mutate(all_responses = gsub("(,\\s)?NA", "", all_responses)) %>%
# left_join(spanish_chip_set %>%
# mutate(spanish = tolower(spanish)) %>%
# select(spanish, chip_id)) %>%
# mutate(correct = ifelse(stringr::str_detect(all_responses, as.character(chip_id)), 1, 0))
#
# }
# }
# return(child_table)
# }
#
# all_child_data <- bind_rows(mutate(shipibo_child_data, language = 'Shipibo'),
# mutate(spanish_child_data, language = 'Spanish')) %>%
# mutate(age = ifelse(!is.na(age),
# as.numeric(as.character(age)), as.numeric(as.character(edad))),
# task = ifelse(task == 1, 'production',
# ifelse(task == 2, 'comprehension', NA))) %>%
# left_join(children_new_labels, by = c("task", "language", "prompt")) %>%
# mutate(correct = NA) %>%
# select(-edad, -`lugar de grabación`, -`grabación`, -fecha ) %>%
# group_by(language, task) %>%
# do(find_correct)
#
# How does bilingualism factor into adult responses?